import scrapy
import openpyxl
from scrapy import Selector, Request

from gwdang.items import NotebookItem

'''
pip freeze > requirements.txt
pip install -r requirements.txt
'''


class JdpppSpider(scrapy.Spider):
    name = 'jdppp'
    allowed_domains = ['item.jd.com']

    def start_requests(self):
        sku_ids = []
        web = openpyxl.load_workbook("/Users/pengwu/Documents/study/jd-text/SKU号.xlsx")
        sheet = web.active
        for row in sheet.iter_rows():
            for cell in row:
                if cell.value.isdigit():
                    sku_ids.append(cell.value)

        for sku_id in sku_ids:
            yield Request(url=f"https://item.m.jd.com/product/{sku_id}.html")

    def parse(self, response):
        sel = Selector(response)
        # print(response.body)

        notebook = NotebookItem()

        notebook['price'] = sel.css('span#priceSale em::text').extract_first()
        notebook['desc'] = sel.css('div#itemName::text').extract_first()
        notebook['id'] = sel.css('span#cheaperNotice::attr(report-pageparam)').extract_first()

        yield notebook
